Analysis objectives

Based on the previous results in the chromvar, DE testing, and proportion analysis, it seems reasonable to focus on these cells. The specific quesitons to answer are:

  1. What are the transcription factor activities that vary across conditions
  2. What are the proteins different across conditions
  3. What can we conclude about the transcriptional profiles different across individuals by
suppressPackageStartupMessages(library(scales))
suppressPackageStartupMessages(library(ggplot2))
suppressPackageStartupMessages(library(fgsea))
suppressPackageStartupMessages(library(ggpubr))
suppressPackageStartupMessages(library(gridExtra))
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(Seurat))
suppressPackageStartupMessages(library(ggrepel))
suppressPackageStartupMessages(library(tidyr))
suppressPackageStartupMessages(library(reshape2))
suppressPackageStartupMessages(library(msigdbr))
suppressPackageStartupMessages(library(tibble))
suppressPackageStartupMessages(library(Signac))

GSEA<-function(findmarks, genesets){
  findmarks$gene<-rownames(findmarks)
  genes<-findmarks %>%
    arrange(desc(avg_log2FC)) %>% 
    dplyr::select(gene, avg_log2FC)
  rnk<- deframe(genes)
  genes<- deframe(genes)
  genes<- fgsea(pathways=genesets, stats = genes,eps=FALSE)
  genes <- genes %>%
    as_tibble() %>%
    arrange(desc(NES))
  genes<-list(genes, rnk)
  names(genes)<-c("results","rnk")
  return(genes)
}

#Function augments the existing result table with pathway information, including the entire rank order list of gene hits and NES scores 
GSEATable<-function(GSEAwrap_out,gmt, gseaParam=1, name){
  #doing constants first since those are easy
  finalresults<-GSEAwrap_out[1][[1]]
  #first get the order of genes for each pathway, modified from the fGSEA plot function  
  rnk <- rank(GSEAwrap_out[2][[1]])
  ord <- order(rnk, decreasing = T)
  statsAdj <- GSEAwrap_out[2][[1]][ord]
  statsAdj <- sign(statsAdj) * (abs(statsAdj) ^ gseaParam)
  statsAdj <- statsAdj / max(abs(statsAdj))
  pathwaysetup<-list()
  NES<-list()
  print("start ranking")
  #basically here I just want to get everything into a character vector
  for (i in 1:length(finalresults$pathway)){
    pathway <- unname(as.vector(na.omit(match(gmt[[finalresults$pathway[[i]]]], names(statsAdj)))))
    pathway <- sort(pathway)
    NES[[i]]<-calcGseaStat(statsAdj, selectedStats = pathway,returnAllExtremes = TRUE)
    NES[[i]]<-NES[[i]]$tops
    pathwaysetup[[i]]<-pathway}
  print("done ranking")
  finalresults$rnkorder<-pathwaysetup
  finalresults$NESorder<-NES
  finalresults$ID<-name
  return(finalresults)
}


GSEAbig<-function(listofGSEAtables){
  GSEA<-bind_rows(listofGSEAtables)
  return(GSEA)
}



GSEAEnrichmentPlotComparison<-function(PathwayName, GSEACompOut, returnplot="none"){
  top<-max(unlist(GSEACompOut$rnkorder))
  GSEACompOut<-GSEACompOut[GSEACompOut$pathway==PathwayName,]
  curves<-list()
  for (i in 1:length(rownames(GSEACompOut))){curves[[i]]<-list(c(0,GSEACompOut$rnkorder[[i]],top),c(0,GSEACompOut$NESorder[[i]],0))
  curves[[i]]<-as.data.frame(curves[[i]])
  curves[[i]]$name<-GSEACompOut$ID[[i]]
  names(curves[[i]])<-c("Rank","ES","Name")}
  curves<-bind_rows(curves)
  if(returnplot=="ES"){p<-EnrichmentScorePlot(curves)
  return(p)}
  if(returnplot=="BC"){p<-BarcodePlot(curves)
  return(p)}
  if(returnplot=="BOTH"){p<-ComboPlot(curves, Title = PathwayName, Table=GSEACompOut[GSEACompOut$pathway==PathwayName,c("ID","padj")])
  return(p)}
  return(curves)
}

BarcodePlot<-function(GSEACompPathway,stacked=FALSE){
  if(stacked==TRUE){GSEACompPathway<-split(GSEACompPathway,GSEACompPathway$Name)
  for (i in 1:length(GSEACompPathway)){GSEACompPathway[[i]]$y<-i}
  GSEACompPathway<-bind_rows(GSEACompPathway)
  GSEACompPathway$y<-((-GSEACompPathway$y)+1)
  p<-ggplot(GSEACompPathway,aes(x=Rank, y=y,xend=Rank, yend=y-1,color=Name))+geom_segment()+theme_classic()+theme(axis.title.y=element_blank(),
                                                                                                                  axis.text.y=element_blank(),
                                                                                                                  axis.ticks.y=element_blank())+xlab("Gene Rank") + xlim(-5,max(GSEACompPathway$Rank+100) )
  }else{
    p<-ggplot(GSEACompPathway,aes(x=Rank, y=-1,xend=Rank, yend=1,color=Name))+geom_segment()+theme_classic()
  }
  return(p)}


EnrichmentScorePlot<-function(GSEACompPathway){
  p<-ggplot(GSEACompPathway, aes(x=Rank, y=ES, color=Name))+geom_line(size=1.5)+geom_hline(yintercept = 0)+theme_classic()+theme(axis.title.x=element_blank(),
                                                                                                                                 axis.text.x=element_blank(),
                                                                                                                                 axis.ticks.x=element_blank())+ylab("Enrichment Score")+xlim(-5,max(GSEACompPathway$Rank+100) )
  return(p)}
ComboPlot<-function(GSEACompPathway, Title="Enrichment Plot",Table=NA){
  EP<-EnrichmentScorePlot(GSEACompPathway)
  if(!is.na(Table)){EP<-EP+annotation_custom(tableGrob(Table, theme = ttheme_minimal(), rows = NULL, cols = c("Name","q Value")), xmin = 10000, ymin = 0.60)}
  BC<-BarcodePlot(GSEACompPathway, stacked=TRUE)
  p<-ggarrange(EP,BC, common.legend = TRUE, ncol = 1, heights = c(0.6,0.25), align = "v", labels = Title)
  
  return(p)}


Meanenrichmentplot<-function(GSEAmain,greplist=c(),pathwaytodo=""){
    final<-list()
    IDssmooth<-paste(greplist, "smooth")
    IDsgray<-paste(greplist, "rough")
    #for each group, go through and make a smooth track 
    for(i in 1:length(greplist)){
    GSEAres<-GSEAmain[grepl(greplist[[i]], GSEAmain$ID) & grepl(pathwaytodo, GSEAmain$pathway),]
    GSEAres$split<-IDsgray[[i]]
    maximum<-max(unlist(GSEAres$rnkorder))
    #generate bins of size 5 (helps with smoothing)
    bins<-seq(1,maximum ,5)
    bins<-c(bins, maximum,maximum)
    res<-list()
    #average anypoint within those bins
    for (j in 1:(length(bins)-1)){
      res[[j]]<-mean(unlist(GSEAres$NESorder)[unlist(GSEAres$rnkorder)>bins[[j]]& unlist(GSEAres$rnkorder)<=bins[[j+1]]])
    }
    res<-c(res, 0)
    #remove any bin that does not have a point 
    bins<-bins[!is.na(res)]
    res<-res[!is.na(res)]
    #make a new track, add it to the overall 
    res<-list(GSEAres$pathway[[1]], NA, NA, NA, NA, NA, NA, NA, list(bins), list(res), "smooth", IDssmooth[[i]])
    GSEAres<-rbind(GSEAres, res)
    GSEAres$NESorder<-lapply(GSEAres$NESorder, as.double)
    curves<-list()
    #generate coordinate curves for each track including the smooth 
    for (j in 1:length(rownames(GSEAres))){curves[[j]]<-list(c(0,GSEAres$rnkorder[[j]],maximum),c(0,GSEAres$NESorder[[j]],0))
    curves[[j]]<-as.data.frame(curves[[j]])
    curves[[j]]$name<-GSEAres$ID[[j]]
    curves[[j]]$split<-GSEAres$split[[j]]
    names(curves[[j]])<-c("Rank","ES","Name","split")}
    final[[i]]<-bind_rows(curves)
    }
    #merge all the groups together
  final<-bind_rows(final)
  #plot things
  final<-ggplot(final[!grepl("smooth", final$split),],aes(x=Rank, y=ES, group=Name,color=split))+geom_line(size=0.25)+geom_hline(yintercept = 0) + 
    geom_smooth(data = final[grepl("smooth", final$split),], aes(x=Rank, y=ES, group=split, color=split), size=1.5, method = "gam")+theme_classic()+
    ggtitle(pathwaytodo)+ylab("Enrichment Score")+xlim(-5,max(maximum+100))
  
  return(final)}
 


GSEATable<-function(GSEAwrap_out,gmt, gseaParam=1, name){
  #doing constants first since those are easy
  finalresults<-GSEAwrap_out[1][[1]]
  #first get the order of genes for each pathway, modified from the fGSEA plot function  
  rnk <- rank(GSEAwrap_out[2][[1]])
  ord <- order(rnk, decreasing = T)
  statsAdj <- GSEAwrap_out[2][[1]][ord]
  statsAdj <- sign(statsAdj) * (abs(statsAdj) ^ gseaParam)
  statsAdj <- statsAdj / max(abs(statsAdj))
  pathwaysetup<-list()
  NES<-list()
  print("start ranking")
  #basically here I just want to get everything into a character vector
  for (i in 1:length(finalresults$pathway)){
    pathway <- unname(as.vector(na.omit(match(gmt[[finalresults$pathway[[i]]]], names(statsAdj)))))
    pathway <- sort(pathway)
    NES[[i]]<-calcGseaStat(statsAdj, selectedStats = pathway,returnAllExtremes = TRUE)
    NES[[i]]<-NES[[i]]$tops
    pathwaysetup[[i]]<-pathway}
  print("done ranking")
  finalresults$rnkorder<-pathwaysetup
  finalresults$NESorder<-NES
  finalresults$ID<-name
  return(finalresults)
}

FixMotifID<-function(markeroutput, seuratobj){
  markeroutput$genes<-ConvertMotifID(seuratobj, assay="ATAC", id=rownames(markeroutput))
  return(markeroutput)
}


GSEAbig<-function(listofGSEAtables){
  GSEA<-bind_rows(listofGSEAtables)
  return(GSEA)
}

VolPlot<-function(results,top=TRUE, adthresh=TRUE, thresh=0.2, threshn=30, Title=NA){
  results$value<-(-log10(results$p_val_adj))
  results$value[results$value==Inf]<-300
  results$gene<-rownames(results)
  p<-ggplot(results,aes(x=avg_log2FC, y=value, label=gene))+geom_point()+theme_classic()+xlab("Average Log Fold Change")+ylab("-log10 adjusted p value")+theme(plot.title = element_text(hjust = 0.5))
  if(adthresh==TRUE){thresh<-AdaptiveThreshold(results, Tstart=thresh,n=threshn)}
  if(top==TRUE){
    p<- p + geom_text_repel(aes(label=ifelse((avg_log2FC>thresh|avg_log2FC<(-thresh))&value>(-log10(0.01)) ,as.character(gene),'')),hjust=1,vjust=1)
  }
  if(!is.na(Title)){
    p<-p+ggtitle(Title)
  }
  return(p)
}
#quick selection of top n genes basically
AdaptiveThreshold<-function(results, n=30,Tstart=0.25){
  m=Inf
  while(n<=m){
    Tstart=Tstart+0.05
    m<-length(rownames(results)[(results$avg_log2FC>Tstart|results$avg_log2FC<(-Tstart))&results$value>10])
  }
  return(Tstart)
}

#borrowing some rushmore colors from the wes anderson color pack
BottleRocket2 = c("#FAD510", "#CB2314", "#273046", "#354823", "#1E1E1E")

#takes in enrichr output
PlotEnrichment<-function(file, topn=10, returnplot=TRUE, title="Significant Pathways"){
  data<-read.delim(file, sep = "\t",header = 1)
  data$value<- -log10(data$Adjusted.P.value)
  #order by most signfiicant 
  data$Term<-factor(data$Term, levels = rev(data$Term))
  data<-data[order(data$value, decreasing = TRUE),]
  #subset to top n 
  data<-data[1:topn,]
  plot<-ggplot(data, aes(x=value, y=Term, fill=ifelse(value>2, "Significant", "Not Significant")))+geom_bar(stat="identity")+xlab("-log10(p_adj)")+ylab("Pathway")+ggtitle(title)+
    theme_classic()+scale_fill_manual(values=c("Significant" = "#CB2314", "Not Significant" = "#1E1E1E"))+ guides(fill=guide_legend(title=""))
  if(returnplot){return(plot)}
  return(data)
}



results<-readRDS("~/gibbs/DOGMAMORPH/Ranalysis/Objects/20230606FinalObj.rds") 
#grabbing hallmark as well as the curated, immune 
m_df_H<- msigdbr(species = "Homo sapiens", category = "H")
m_df_H<- rbind(msigdbr(species = "Homo sapiens", category = "C2"), m_df_H)
m_df_H<- rbind(msigdbr(species = "Homo sapiens", category = "C7"), m_df_H)
fgsea_sets<- m_df_H %>% split(x = .$gene_symbol, f = .$gs_name)

transcription factor activities

The objective is to see if there are specific transcription factors that may be driving differential activity across conditions. We will be comparing at time 0 and time 3 in a nonbias manner (DE) and the graphing specific factors of interest.

Results suggest that JUN/FOS factors are perturbed in this context, along with a few more housekeeping like features like CTCF and YY1. Further analysis should focus on correlating these factors with gene expression (in the whole dataset, as well as within this population, and across conditions) to see if there is a program driven by this change.

results$T_Tp<-paste(results$Treatment, results$Timepoint, sep = "_")
results$T_Tp<-factor(results$T_Tp, levels = c("Methadone_0","Methadone_3","Bup.Nalo_0", "Bup.Nalo_3", "Naltrexone_0", "Naltrexone_3"))
results_cyto<-subset(results, merged_clusters=="Cytotoxic_T")

DefaultAssay(results_cyto)<-"chromvar"
Idents(results_cyto)<-results_cyto$T_Tp

# month 3 comparisons 

Meth_vs_Nal_3<-FindMarkers(results_cyto, "Methadone_3","Naltrexone_3", mean.fxn = rowMeans)
Meth_vs_Bup.Nalo_3<-FindMarkers(results_cyto, "Methadone_3","Bup.Nalo_3", mean.fxn = rowMeans)
Bup.Nalo_vs_Nal_3<-FindMarkers(results_cyto, "Bup.Nalo_3","Naltrexone_3", mean.fxn = rowMeans)

# month 0 comparisons 
Meth_vs_Nal_0<-FindMarkers(results_cyto, "Methadone_0","Naltrexone_0", mean.fxn = rowMeans)
Meth_vs_Bup.Nalo_0<-FindMarkers(results_cyto, "Methadone_0","Bup.Nalo_0", mean.fxn = rowMeans)
Bup.Nalo_vs_Nal_0<-FindMarkers(results_cyto, "Bup.Nalo_0","Naltrexone_0", mean.fxn = rowMeans)


Meth_vs_Nal_3<-FixMotifID(Meth_vs_Nal_3, results_cyto)
Meth_vs_Bup.Nalo_3<-FixMotifID(Meth_vs_Bup.Nalo_3, results_cyto)
Bup.Nalo_vs_Nal_3<-FixMotifID(Bup.Nalo_vs_Nal_3, results_cyto)
Meth_vs_Nal_0<-FixMotifID(Meth_vs_Nal_0, results_cyto)
Meth_vs_Bup.Nalo_0<-FixMotifID(Meth_vs_Bup.Nalo_0, results_cyto)
Bup.Nalo_vs_Nal_0<-FixMotifID(Bup.Nalo_vs_Nal_0, results_cyto)

DT::datatable(rownames=TRUE, filter="top", class='cell-border stripe', extensions = 'Buttons', options = list(dom = 'Bfrtip', buttons = c('copy', 'csv', 'excel', 'pdf', 'print')), data=Meth_vs_Nal_3)
DT::datatable(rownames=TRUE, filter="top", class='cell-border stripe', extensions = 'Buttons', options = list(dom = 'Bfrtip', buttons = c('copy', 'csv', 'excel', 'pdf', 'print')), data=Meth_vs_Bup.Nalo_3)
DT::datatable(rownames=TRUE, filter="top", class='cell-border stripe', extensions = 'Buttons', options = list(dom = 'Bfrtip', buttons = c('copy', 'csv', 'excel', 'pdf', 'print')), data=Bup.Nalo_vs_Nal_3)
DT::datatable(rownames=TRUE, filter="top", class='cell-border stripe', extensions = 'Buttons', options = list(dom = 'Bfrtip', buttons = c('copy', 'csv', 'excel', 'pdf', 'print')), data=Meth_vs_Nal_0)
DT::datatable(rownames=TRUE, filter="top", class='cell-border stripe', extensions = 'Buttons', options = list(dom = 'Bfrtip', buttons = c('copy', 'csv', 'excel', 'pdf', 'print')), data=Meth_vs_Bup.Nalo_0)
DT::datatable(rownames=TRUE, filter="top", class='cell-border stripe', extensions = 'Buttons', options = list(dom = 'Bfrtip', buttons = c('copy', 'csv', 'excel', 'pdf', 'print')), data=Bup.Nalo_vs_Nal_0)
VlnPlot(results_cyto, "MA1143.1", pt.size = 0.1 )+ggtitle("FOSL1::JUND")

VlnPlot(results_cyto, "MA0095.3", pt.size = 0.1 )+ggtitle("Yy1")

VlnPlot(results_cyto, "MA0139.1", pt.size = 0.1 )+ggtitle("CTCF")

VlnPlot(results_cyto, "MA0656.1", pt.size = 0.1 )+ggtitle("JDP2")

VlnPlot(results_cyto, "MA0605.2", pt.size = 0.1 )+ggtitle("ATF3")

VlnPlot(results_cyto, "MA0840.1", pt.size = 0.1 )+ggtitle("Creb5")

VlnPlot(results_cyto, "MA1145.1", pt.size = 0.1 )+ggtitle("FOSL2::JUND")

VlnPlot(results_cyto, "MA1131.1", pt.size = 0.1 )+ggtitle("FOSL2::JUN")

VlnPlot(results_cyto, "MA1155.1", pt.size = 0.1 )+ggtitle("ZSCAN4")

VlnPlot(results_cyto, "MA0478.1", pt.size = 0.1 )+ggtitle("FOSL2")

VlnPlot(results_cyto, "MA1988.1", pt.size = 0.1 )+ggtitle("Atf3")

VlnPlot(results_cyto, "MA1134.1", pt.size = 0.1 )+ggtitle("FOS::JUNB")

VlnPlot(results_cyto, "MA0489.2", pt.size = 0.1 )+ggtitle("Jun")

VlnPlot(results_cyto, "MA1634.1", pt.size = 0.1 )+ggtitle("BATF")

Idents(results_cyto)<-factor(Idents(results_cyto),levels = c("Methadone_0","Bup.Nalo_0","Naltrexone_0","Methadone_3", "Bup.Nalo_3", "Naltrexone_3"))

VlnPlot(results_cyto, "MA1143.1", pt.size = 0.1 )+ggtitle("FOSL1::JUND")

VlnPlot(results_cyto, "MA0095.3", pt.size = 0.1 )+ggtitle("Yy1")

VlnPlot(results_cyto, "MA0139.1", pt.size = 0.1 )+ggtitle("CTCF")

VlnPlot(results_cyto, "MA0656.1", pt.size = 0.1 )+ggtitle("JDP2")

VlnPlot(results_cyto, "MA0605.2", pt.size = 0.1 )+ggtitle("ATF3")

VlnPlot(results_cyto, "MA0840.1", pt.size = 0.1 )+ggtitle("Creb5")

VlnPlot(results_cyto, "MA1145.1", pt.size = 0.1 )+ggtitle("FOSL2::JUND")

VlnPlot(results_cyto, "MA1131.1", pt.size = 0.1 )+ggtitle("FOSL2::JUN")

VlnPlot(results_cyto, "MA1155.1", pt.size = 0.1 )+ggtitle("ZSCAN4")

VlnPlot(results_cyto, "MA0478.1", pt.size = 0.1 )+ggtitle("FOSL2")

VlnPlot(results_cyto, "MA1988.1", pt.size = 0.1 )+ggtitle("Atf3")

VlnPlot(results_cyto, "MA1134.1", pt.size = 0.1 )+ggtitle("FOS::JUNB")

VlnPlot(results_cyto, "MA0489.2", pt.size = 0.1 )+ggtitle("Jun")

VlnPlot(results_cyto, "MA1634.1", pt.size = 0.1 )+ggtitle("BATF")

Surface protein differences

Doing the same thing as Chormvar but instead we’re looking at protein expression.

As below, this cell type basically has no differences in protein expression across conditions. The only significant factor was CD101, which is reported to impact myeloid differentiation.

DefaultAssay(results_cyto)<-"ADT"
Idents(results_cyto)<-results_cyto$T_Tp

#need to first do CLR by library 

results_cyto_split<-SplitObject(results_cyto, split.by = "orig.ident")

results_cyto_split<-lapply(results_cyto_split, NormalizeData, normalization.method = "CLR")
## Normalizing across features
## Normalizing across features
## Normalizing across features
## Normalizing across features
## Normalizing across features
## Normalizing across features
## Normalizing across features
## Normalizing across features
## Normalizing across features
for (i in 1:9){
  results_cyto_split[[i]]<-results_cyto_split[[i]]@assays$ADT@data
}
results_cyto_split<-do.call(cbind, results_cyto_split)
#put it in but ensure things are correct
results_cyto@assays$ADT@data<-results_cyto_split[,colnames(results_cyto)]

Meth_vs_Nal_3<-FindMarkers(results_cyto, "Methadone_3","Naltrexone_3")
Meth_vs_Bup.Nalo_3<-FindMarkers(results_cyto, "Methadone_3","Bup.Nalo_3")
Bup.Nalo_vs_Nal_3<-FindMarkers(results_cyto, "Bup.Nalo_3","Naltrexone_3")
## Warning in FindMarkers.default(object = data.use, slot = data.slot, counts =
## counts, : No features pass logfc.threshold threshold; returning empty
## data.frame
# month 0 comparisons 
Meth_vs_Nal_0<-FindMarkers(results_cyto, "Methadone_0","Naltrexone_0")
Meth_vs_Bup.Nalo_0<-FindMarkers(results_cyto, "Methadone_0","Bup.Nalo_0")
Bup.Nalo_vs_Nal_0<-FindMarkers(results_cyto, "Bup.Nalo_0","Naltrexone_0")
## Warning in FindMarkers.default(object = data.use, slot = data.slot, counts =
## counts, : No features pass logfc.threshold threshold; returning empty
## data.frame
#Bup.Nalo and Nal are not different at all (0 pass logfc)
DT::datatable(rownames=TRUE, filter="top", class='cell-border stripe', extensions = 'Buttons', options = list(dom = 'Bfrtip', buttons = c('copy', 'csv', 'excel', 'pdf', 'print')), data=Meth_vs_Nal_3)
DT::datatable(rownames=TRUE, filter="top", class='cell-border stripe', extensions = 'Buttons', options = list(dom = 'Bfrtip', buttons = c('copy', 'csv', 'excel', 'pdf', 'print')), data=Meth_vs_Bup.Nalo_3)
DT::datatable(rownames=TRUE, filter="top", class='cell-border stripe', extensions = 'Buttons', options = list(dom = 'Bfrtip', buttons = c('copy', 'csv', 'excel', 'pdf', 'print')), data=Meth_vs_Nal_0)
DT::datatable(rownames=TRUE, filter="top", class='cell-border stripe', extensions = 'Buttons', options = list(dom = 'Bfrtip', buttons = c('copy', 'csv', 'excel', 'pdf', 'print')), data=Meth_vs_Bup.Nalo_0)
#only DE one, promyeloid differentiation 
VlnPlot(results_cyto, "CD101-TotalA", pt.size = 0.1 )

Gene expression and GSEA

Standard DE

We’re just using the standard wilcox/mann whitney U test here to look at the impact of these different conditions on gene expression

DefaultAssay(results_cyto)<-"RNA"

# month 3 comparisons 

Meth_vs_Nal_3<-FindMarkers(results_cyto, "Methadone_3","Naltrexone_3")
Meth_vs_Bup.Nalo_3<-FindMarkers(results_cyto, "Methadone_3","Bup.Nalo_3")
Bup.Nalo_vs_Nal_3<-FindMarkers(results_cyto, "Bup.Nalo_3","Naltrexone_3")

# month 0 comparisons 
Meth_vs_Nal_0<-FindMarkers(results_cyto, "Methadone_0","Naltrexone_0")
Meth_vs_Bup.Nalo_0<-FindMarkers(results_cyto, "Methadone_0","Bup.Nalo_0")
Bup.Nalo_vs_Nal_0<-FindMarkers(results_cyto, "Bup.Nalo_0","Naltrexone_0")


DT::datatable(rownames=TRUE, filter="top", class='cell-border stripe', extensions = 'Buttons', options = list(dom = 'Bfrtip', buttons = c('copy', 'csv', 'excel', 'pdf', 'print')), data=Meth_vs_Nal_3)
DT::datatable(rownames=TRUE, filter="top", class='cell-border stripe', extensions = 'Buttons', options = list(dom = 'Bfrtip', buttons = c('copy', 'csv', 'excel', 'pdf', 'print')), data=Meth_vs_Bup.Nalo_3)
DT::datatable(rownames=TRUE, filter="top", class='cell-border stripe', extensions = 'Buttons', options = list(dom = 'Bfrtip', buttons = c('copy', 'csv', 'excel', 'pdf', 'print')), data=Bup.Nalo_vs_Nal_3)
DT::datatable(rownames=TRUE, filter="top", class='cell-border stripe', extensions = 'Buttons', options = list(dom = 'Bfrtip', buttons = c('copy', 'csv', 'excel', 'pdf', 'print')), data=Meth_vs_Nal_0)
DT::datatable(rownames=TRUE, filter="top", class='cell-border stripe', extensions = 'Buttons', options = list(dom = 'Bfrtip', buttons = c('copy', 'csv', 'excel', 'pdf', 'print')), data=Meth_vs_Bup.Nalo_0)
DT::datatable(rownames=TRUE, filter="top", class='cell-border stripe', extensions = 'Buttons', options = list(dom = 'Bfrtip', buttons = c('copy', 'csv', 'excel', 'pdf', 'print')), data=Bup.Nalo_vs_Nal_0)
#volcanos of just those highly differentially expressed
VolPlot(Meth_vs_Nal_3)

VolPlot(Meth_vs_Bup.Nalo_3)
## Warning: ggrepel: 106 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

VolPlot(Bup.Nalo_vs_Nal_3)

VolPlot(Meth_vs_Nal_0)

VolPlot(Meth_vs_Bup.Nalo_0)

VolPlot(Bup.Nalo_vs_Nal_0)
## Warning: ggrepel: 60 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

#doing it again but with no cutoffs for FC

# month 3 comparisons 

Meth_vs_Nal_3<-FindMarkers(results_cyto, "Methadone_3","Naltrexone_3", min.pct = -Inf, min.diff.pct = -Inf, logfc.threshold = -Inf)
Meth_vs_Bup.Nalo_3<-FindMarkers(results_cyto, "Methadone_3","Bup.Nalo_3", min.pct = -Inf, min.diff.pct = -Inf, logfc.threshold = -Inf)
Bup.Nalo_vs_Nal_3<-FindMarkers(results_cyto, "Bup.Nalo_3","Naltrexone_3", min.pct = -Inf, min.diff.pct = -Inf, logfc.threshold = -Inf)

# month 0 comparisons 
Meth_vs_Nal_0<-FindMarkers(results_cyto, "Methadone_0","Naltrexone_0", min.pct = -Inf, min.diff.pct = -Inf, logfc.threshold = -Inf)
Meth_vs_Bup.Nalo_0<-FindMarkers(results_cyto, "Methadone_0","Bup.Nalo_0", min.pct = -Inf, min.diff.pct = -Inf, logfc.threshold = -Inf)
Bup.Nalo_vs_Nal_0<-FindMarkers(results_cyto, "Bup.Nalo_0","Naltrexone_0", min.pct = -Inf, min.diff.pct = -Inf, logfc.threshold = -Inf)

#not placing data tables for these very large dataframes as they make the resulting html unusable 

VolPlot(Meth_vs_Nal_3)

VolPlot(Meth_vs_Bup.Nalo_3)
## Warning: ggrepel: 106 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

VolPlot(Bup.Nalo_vs_Nal_3)

VolPlot(Meth_vs_Nal_0)

VolPlot(Meth_vs_Bup.Nalo_0)

VolPlot(Bup.Nalo_vs_Nal_0)
## Warning: ggrepel: 61 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

#### GO analysis of DE genes from traditional analyses

Using Enrichr we can look for genesets that are enriched here for each comparison.

In general it doesn’t look that interesting because the pathways involved are focused mainly on those dominated by ribosomal proteins, although there is a subset that interestingly has IL27 signaling.

#outputing the genelists to test 

to_output<-list(Meth_vs_Nal_3,Meth_vs_Bup.Nalo_3,Bup.Nalo_vs_Nal_3,Meth_vs_Nal_0,Meth_vs_Bup.Nalo_0,Bup.Nalo_vs_Nal_0 )
names(to_output)<-c("Meth_vs_Nal_3","Meth_vs_Bup.Nalo_3","Bup.Nalo_vs_Nal_3","Meth_vs_Nal_0","Meth_vs_Bup.Nalo_0","Bup.Nalo_vs_Nal_0")
pcut<-0.05 
LFCcut<-0.25
outdir<-"~/gibbs/DOGMAMORPH/Ranalysis/enrichr/Trad_DE_cyto"
#basically we'll loop through, write the subset of genes based on those cutoffs 
for (i in 1:length(to_output)){
  cur<-to_output[[i]][(to_output[[i]]$avg_log2FC< -LFCcut) | (to_output[[i]]$avg_log2FC> LFCcut),]
  cur$p_val_adj<-p.adjust(cur$p_val, "BH")
  write.table(rownames(cur)[(cur$p_val_adj<pcut)&(cur$avg_log2FC>0)], paste0(outdir,"/", names(to_output)[[i]], "_up.txt"), 
              quote = FALSE, row.names = FALSE, col.names = FALSE)
  write.table(rownames(cur)[(cur$p_val_adj<pcut)&(cur$avg_log2FC<0)], paste0(outdir,"/", names(to_output)[[i]], "_down.txt"), 
              quote = FALSE, row.names = FALSE, col.names = FALSE)
}


todo<-list.files("~/gibbs/DOGMAMORPH/Ranalysis/enrichr/Trad_DE_cyto/Results_tables/")

for (i in todo){
  print(PlotEnrichment(paste0("~/gibbs/DOGMAMORPH/Ranalysis/enrichr/Trad_DE_cyto/Results_tables/",i), topn = 20, title=i))
}

#### GSEA analysis of DE genes from traditional analyses

GSEA is a nice addition particularly in single cell because the singals are weaker due to inherent sparcity and pooling info across genes by ranking can be more powerful.

GSEAres<-list()
for (i in 1:length(to_output)){
GSEAres[[i]]<-GSEA(to_output[[i]], genesets = fgsea_sets)
GSEAres[[i]]<-GSEATable(GSEAwrap_out =GSEAres[[i]], gmt = fgsea_sets, name = names(to_output)[[i]] )}
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (7.32% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## [1] "start ranking"
## [1] "done ranking"
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (7.72% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## [1] "start ranking"
## [1] "done ranking"
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (7.27% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## [1] "start ranking"
## [1] "done ranking"
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (7.74% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## [1] "start ranking"
## [1] "done ranking"
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (7.09% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## [1] "start ranking"
## [1] "done ranking"
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (6.87% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## [1] "start ranking"
## [1] "done ranking"
GSEAres<-GSEAbig(listofGSEAtables = GSEAres)

saveRDS(GSEAres, file="~/gibbs/DOGMAMORPH/Ranalysis/Objects/20230622GSEACtyoTTrad.rds")

pseudobulk comparisons by timepoint

Rationale: a few studies now have shown that this is a more robust way to analyze single cell data because it’s less susceptible to normalization issues around zero. Basically if we instead of a psuedobulk, although the significance goes down because we are no longer having such an inflated N, we do get more meaningful differences.

This is most important to to because the principals of many standard statistical tests assume indpendence, and cells are not independent within a sample.

devtools::session_info()
## Warning in system("timedatectl", intern = TRUE): running command 'timedatectl'
## had status 1
## - Session info ---------------------------------------------------------------
##  setting  value
##  version  R version 4.2.0 (2022-04-22)
##  os       Red Hat Enterprise Linux 8.8 (Ootpa)
##  system   x86_64, linux-gnu
##  ui       X11
##  language (EN)
##  collate  C
##  ctype    C
##  tz       Etc/UTC
##  date     2023-06-22
##  pandoc   3.1.1 @ /usr/lib/rstudio-server/bin/quarto/bin/tools/ (via rmarkdown)
## 
## - Packages -------------------------------------------------------------------
##  package          * version   date (UTC) lib source
##  abind              1.4-5     2016-07-21 [2] CRAN (R 4.2.0)
##  babelgene          22.9      2022-09-29 [1] CRAN (R 4.2.0)
##  backports          1.4.1     2021-12-13 [2] CRAN (R 4.2.0)
##  beeswarm           0.4.0     2021-06-01 [2] CRAN (R 4.2.0)
##  BiocGenerics       0.44.0    2022-11-01 [1] Bioconductor
##  BiocParallel       1.32.6    2023-03-17 [1] Bioconductor
##  Biostrings         2.66.0    2022-11-01 [1] Bioconductor
##  bitops             1.0-7     2021-04-24 [2] CRAN (R 4.2.0)
##  broom              1.0.4     2023-03-11 [1] CRAN (R 4.2.0)
##  bslib              0.4.2     2022-12-16 [1] CRAN (R 4.2.0)
##  cachem             1.0.8     2023-05-01 [1] CRAN (R 4.2.0)
##  callr              3.7.3     2022-11-02 [1] CRAN (R 4.2.0)
##  car                3.1-2     2023-03-30 [1] CRAN (R 4.2.0)
##  carData            3.0-5     2022-01-06 [2] CRAN (R 4.2.0)
##  cli                3.6.1     2023-03-23 [1] CRAN (R 4.2.0)
##  cluster            2.1.4     2022-08-22 [2] CRAN (R 4.2.0)
##  codetools          0.2-19    2023-02-01 [2] CRAN (R 4.2.0)
##  colorspace         2.1-0     2023-01-23 [2] CRAN (R 4.2.0)
##  cowplot            1.1.1     2020-12-30 [2] CRAN (R 4.2.0)
##  crayon             1.5.2     2022-09-29 [2] CRAN (R 4.2.0)
##  crosstalk          1.2.0     2021-11-04 [2] CRAN (R 4.2.0)
##  data.table         1.14.8    2023-02-17 [2] CRAN (R 4.2.0)
##  DBI                1.1.3     2022-06-18 [2] CRAN (R 4.2.0)
##  deldir             1.0-6     2021-10-23 [2] CRAN (R 4.2.0)
##  devtools           2.4.5     2022-10-11 [1] CRAN (R 4.2.0)
##  digest             0.6.31    2022-12-11 [2] CRAN (R 4.2.0)
##  dplyr            * 1.1.2     2023-04-20 [1] CRAN (R 4.2.0)
##  DT                 0.28      2023-05-18 [1] CRAN (R 4.2.0)
##  ellipsis           0.3.2     2021-04-29 [2] CRAN (R 4.2.0)
##  evaluate           0.20      2023-01-17 [2] CRAN (R 4.2.0)
##  fansi              1.0.4     2023-01-22 [2] CRAN (R 4.2.0)
##  farver             2.1.1     2022-07-06 [2] CRAN (R 4.2.0)
##  fastmap            1.1.1     2023-02-24 [1] CRAN (R 4.2.0)
##  fastmatch          1.1-3     2021-07-23 [2] CRAN (R 4.2.0)
##  fgsea            * 1.24.0    2022-11-01 [1] Bioconductor
##  fitdistrplus       1.1-8     2022-03-10 [2] CRAN (R 4.2.0)
##  fs                 1.6.1     2023-02-06 [2] CRAN (R 4.2.0)
##  future             1.32.0    2023-03-07 [1] CRAN (R 4.2.0)
##  future.apply       1.10.0    2022-11-05 [1] CRAN (R 4.2.0)
##  generics           0.1.3     2022-07-05 [2] CRAN (R 4.2.0)
##  GenomeInfoDb       1.34.9    2023-02-02 [1] Bioconductor
##  GenomeInfoDbData   1.2.9     2023-03-17 [1] Bioconductor
##  GenomicRanges      1.50.2    2022-12-16 [1] Bioconductor
##  ggbeeswarm         0.7.2     2023-04-29 [1] CRAN (R 4.2.0)
##  ggplot2          * 3.4.2     2023-04-03 [1] CRAN (R 4.2.0)
##  ggpubr           * 0.6.0     2023-02-10 [1] CRAN (R 4.2.0)
##  ggrastr            1.0.1     2021-12-08 [1] CRAN (R 4.2.0)
##  ggrepel          * 0.9.3     2023-02-03 [1] CRAN (R 4.2.0)
##  ggridges           0.5.4     2022-09-26 [1] CRAN (R 4.2.0)
##  ggsignif           0.6.4     2022-10-13 [1] CRAN (R 4.2.0)
##  globals            0.16.2    2022-11-21 [1] CRAN (R 4.2.0)
##  glue               1.6.2     2022-02-24 [2] CRAN (R 4.2.0)
##  goftest            1.2-3     2021-10-07 [2] CRAN (R 4.2.0)
##  gridExtra        * 2.3       2017-09-09 [2] CRAN (R 4.2.0)
##  gtable             0.3.3     2023-03-21 [1] CRAN (R 4.2.0)
##  highr              0.10      2022-12-22 [1] CRAN (R 4.2.0)
##  htmltools          0.5.5     2023-03-23 [1] CRAN (R 4.2.0)
##  htmlwidgets        1.6.2     2023-03-17 [1] CRAN (R 4.2.0)
##  httpuv             1.6.9     2023-02-14 [1] CRAN (R 4.2.0)
##  httr               1.4.5     2023-02-24 [1] CRAN (R 4.2.0)
##  ica                1.0-3     2022-07-08 [2] CRAN (R 4.2.0)
##  igraph             1.4.2     2023-04-07 [1] CRAN (R 4.2.0)
##  IRanges            2.32.0    2022-11-01 [1] Bioconductor
##  irlba              2.3.5.1   2022-10-03 [1] CRAN (R 4.2.0)
##  jquerylib          0.1.4     2021-04-26 [2] CRAN (R 4.2.0)
##  jsonlite           1.8.4     2022-12-06 [2] CRAN (R 4.2.0)
##  KernSmooth         2.23-20   2021-05-03 [2] CRAN (R 4.2.0)
##  knitr              1.42      2023-01-25 [1] CRAN (R 4.2.0)
##  labeling           0.4.2     2020-10-20 [2] CRAN (R 4.2.0)
##  later              1.3.0     2021-08-18 [2] CRAN (R 4.2.0)
##  lattice            0.21-8    2023-04-05 [1] CRAN (R 4.2.0)
##  lazyeval           0.2.2     2019-03-15 [2] CRAN (R 4.2.0)
##  leiden             0.4.3     2022-09-10 [1] CRAN (R 4.2.0)
##  lifecycle          1.0.3     2022-10-07 [1] CRAN (R 4.2.0)
##  limma              3.54.2    2023-02-28 [1] Bioconductor
##  listenv            0.9.0     2022-12-16 [2] CRAN (R 4.2.0)
##  lmtest             0.9-40    2022-03-21 [2] CRAN (R 4.2.0)
##  magrittr           2.0.3     2022-03-30 [2] CRAN (R 4.2.0)
##  MASS               7.3-59    2023-04-21 [1] CRAN (R 4.2.0)
##  Matrix             1.5-4     2023-04-04 [1] CRAN (R 4.2.0)
##  matrixStats        0.63.0    2022-11-18 [2] CRAN (R 4.2.0)
##  memoise            2.0.1     2021-11-26 [2] CRAN (R 4.2.0)
##  mime               0.12      2021-09-28 [2] CRAN (R 4.2.0)
##  miniUI             0.1.1.1   2018-05-18 [2] CRAN (R 4.2.0)
##  msigdbr          * 7.5.1     2022-03-30 [1] CRAN (R 4.2.0)
##  munsell            0.5.0     2018-06-12 [2] CRAN (R 4.2.0)
##  nlme               3.1-162   2023-01-31 [1] CRAN (R 4.2.0)
##  parallelly         1.35.0    2023-03-23 [1] CRAN (R 4.2.0)
##  patchwork          1.1.2     2022-08-19 [1] CRAN (R 4.2.0)
##  pbapply            1.7-0     2023-01-13 [1] CRAN (R 4.2.0)
##  pillar             1.9.0     2023-03-22 [1] CRAN (R 4.2.0)
##  pkgbuild           1.4.0     2022-11-27 [1] CRAN (R 4.2.0)
##  pkgconfig          2.0.3     2019-09-22 [2] CRAN (R 4.2.0)
##  pkgload            1.3.2     2022-11-16 [1] CRAN (R 4.2.0)
##  plotly             4.10.1    2022-11-07 [1] CRAN (R 4.2.0)
##  plyr               1.8.8     2022-11-11 [1] CRAN (R 4.2.0)
##  png                0.1-8     2022-11-29 [1] CRAN (R 4.2.0)
##  polyclip           1.10-4    2022-10-20 [1] CRAN (R 4.2.0)
##  prettyunits        1.1.1     2020-01-24 [2] CRAN (R 4.2.0)
##  processx           3.8.1     2023-04-18 [1] CRAN (R 4.2.0)
##  profvis            0.3.8     2023-05-02 [1] CRAN (R 4.2.0)
##  progressr          0.13.0    2023-01-10 [1] CRAN (R 4.2.0)
##  promises           1.2.0.1   2021-02-11 [2] CRAN (R 4.2.0)
##  ps                 1.7.5     2023-04-18 [1] CRAN (R 4.2.0)
##  purrr              1.0.1     2023-01-10 [1] CRAN (R 4.2.0)
##  R6                 2.5.1     2021-08-19 [2] CRAN (R 4.2.0)
##  RANN               2.6.1     2019-01-08 [2] CRAN (R 4.2.0)
##  RColorBrewer       1.1-3     2022-04-03 [2] CRAN (R 4.2.0)
##  Rcpp               1.0.10    2023-01-22 [1] CRAN (R 4.2.0)
##  RcppAnnoy          0.0.20    2022-10-27 [1] CRAN (R 4.2.0)
##  RcppRoll           0.3.0     2018-06-05 [2] CRAN (R 4.2.0)
##  RCurl              1.98-1.12 2023-03-27 [1] CRAN (R 4.2.0)
##  remotes            2.4.2     2021-11-30 [2] CRAN (R 4.2.0)
##  reshape2         * 1.4.4     2020-04-09 [2] CRAN (R 4.2.0)
##  reticulate         1.28      2023-01-27 [1] CRAN (R 4.2.0)
##  rlang              1.1.1     2023-04-28 [1] CRAN (R 4.2.0)
##  rmarkdown          2.22      2023-06-01 [1] CRAN (R 4.2.0)
##  ROCR               1.0-11    2020-05-02 [2] CRAN (R 4.2.0)
##  Rsamtools          2.14.0    2022-11-01 [1] Bioconductor
##  rstatix            0.7.2     2023-02-01 [1] CRAN (R 4.2.0)
##  rstudioapi         0.14      2022-08-22 [1] CRAN (R 4.2.0)
##  Rtsne              0.16      2022-04-17 [2] CRAN (R 4.2.0)
##  S4Vectors          0.36.2    2023-02-26 [1] Bioconductor
##  sass               0.4.5     2023-01-24 [1] CRAN (R 4.2.0)
##  scales           * 1.2.1     2022-08-20 [1] CRAN (R 4.2.0)
##  scattermore        0.8       2022-02-14 [1] CRAN (R 4.2.0)
##  sctransform        0.3.5     2022-09-21 [1] CRAN (R 4.2.0)
##  sessioninfo        1.2.2     2021-12-06 [2] CRAN (R 4.2.0)
##  Seurat           * 4.3.0     2022-11-18 [1] CRAN (R 4.2.0)
##  SeuratObject     * 4.1.3     2022-11-07 [1] CRAN (R 4.2.0)
##  shiny              1.7.4     2022-12-15 [1] CRAN (R 4.2.0)
##  Signac           * 1.9.0     2022-12-08 [1] CRAN (R 4.2.0)
##  sp                 1.6-0     2023-01-19 [1] CRAN (R 4.2.0)
##  spatstat.data      3.0-1     2023-03-12 [1] CRAN (R 4.2.0)
##  spatstat.explore   3.1-0     2023-03-14 [1] CRAN (R 4.2.0)
##  spatstat.geom      3.1-0     2023-03-12 [1] CRAN (R 4.2.0)
##  spatstat.random    3.1-4     2023-03-13 [1] CRAN (R 4.2.0)
##  spatstat.sparse    3.0-1     2023-03-12 [1] CRAN (R 4.2.0)
##  spatstat.utils     3.0-2     2023-03-11 [1] CRAN (R 4.2.0)
##  stringi            1.7.12    2023-01-11 [1] CRAN (R 4.2.0)
##  stringr            1.5.0     2022-12-02 [1] CRAN (R 4.2.0)
##  survival           3.5-5     2023-03-12 [1] CRAN (R 4.2.0)
##  tensor             1.5       2012-05-05 [2] CRAN (R 4.2.0)
##  tibble           * 3.2.1     2023-03-20 [1] CRAN (R 4.2.0)
##  tidyr            * 1.3.0     2023-01-24 [1] CRAN (R 4.2.0)
##  tidyselect         1.2.0     2022-10-10 [1] CRAN (R 4.2.0)
##  urlchecker         1.0.1     2021-11-30 [1] CRAN (R 4.2.0)
##  usethis            2.1.6     2022-05-25 [1] CRAN (R 4.2.0)
##  utf8               1.2.3     2023-01-31 [1] CRAN (R 4.2.0)
##  uwot               0.1.14    2022-08-22 [1] CRAN (R 4.2.0)
##  vctrs              0.6.2     2023-04-19 [1] CRAN (R 4.2.0)
##  vipor              0.4.5     2017-03-22 [2] CRAN (R 4.2.0)
##  viridisLite        0.4.2     2023-05-02 [1] CRAN (R 4.2.0)
##  withr              2.5.0     2022-03-03 [2] CRAN (R 4.2.0)
##  xfun               0.39      2023-04-20 [1] CRAN (R 4.2.0)
##  xtable             1.8-4     2019-04-21 [2] CRAN (R 4.2.0)
##  XVector            0.38.0    2022-11-01 [1] Bioconductor
##  yaml               2.3.7     2023-01-23 [1] CRAN (R 4.2.0)
##  zlibbioc           1.44.0    2022-11-01 [1] Bioconductor
##  zoo                1.8-12    2023-04-13 [1] CRAN (R 4.2.0)
## 
##  [1] /gpfs/gibbs/project/ya-chi_ho/jac369/R/4.2
##  [2] /vast/palmer/apps/avx2/software/R/4.2.0-foss-2020b/lib64/R/library
## 
## ------------------------------------------------------------------------------